import scrapy
from ..items import HeartItem

class HeartSpider(scrapy.Spider):
    name = "new_article"
    # allow_domains = ["heart.dxy.cn"]
    start_urls = ["http://heart.dxy.cn/tag/news"]

    def parse(self, response):
        article_links = response.xpath("//p[contains(@class,'title')]/a[contains(@class,'h4')]/@href").extract()
        for article_link in article_links:
            # self.log(article_link)
            yield scrapy.Request(article_link, callback=self.parse_article)

    def parse_article(self, response):
        article = HeartItem()
        article["title"] = response.xpath("//h1/text()").extract()
        article["date"] = response.xpath("//div[contains(@class,'sum')]/span/text()").extract()[0].strip()
        article["source"] = response.xpath("//div[contains(@class,'sum')]/span/text()").extract()[1].strip()
        article["author"] = response.xpath("//div[contains(@class,'sum')]/span/text()").extract()[2].strip()
        article["content"] = response.xpath("//div[@id='content']/p/descendant::text()").extract()

        yield article
